###########################
##################
##### Replication Rcode file for "The (still) mysterious case of agricultural protectionism"
### Authors: Quynh Nguyen, Gabriele Spilker, and Thomas Bernauer  
#### January 22, 2021


rm(list=ls())
pacman::p_load(foreign, ggplot2, plm, dplyr, reshape2, countrycode, sandwich, lmtest, MASS, 
               rworldmap, RColorBrewer, states, mice, VIM, stargazer, margins, clusterSEs, lme4, optimx,
               coefplot, survey, sampleSelection)
library(foreign)
library(cregg)
library(ggplot2)
library(prediction)
library(ggeffects)
library(dplyr)
library(tidyverse)
library(reshape)
library(ggpubr)
library(psych)
library(moderndive)
library(jtools)
library(dotwhisker)
library(stargazer)
library(cjoint)
library(gridExtra)

setwd("~/Dropbox/Agrar/Data replication") #needs to be adjusted depending on user's own workpath

## Input data 
dfagrar_noncj <- read.dta("agrardata_noncj_rep.dta") 

###################
###Prepare data for analysis
##Define variables

## Outcome variable: Agrar subsidies too low
dfagrar_noncj$farmsupport <- as.numeric(ordered(dfagrar_noncj$agrarsubtoolow,levels=c("1.Too high","2.Somewhat high",
                                                                          "3.Appropriate", "4.Somewhat low", "5.Too low")))

## Treatment: Cost frame 
dfagrar_noncj$treatment <- as.numeric(ordered(dfagrar_noncj$costframe,levels=c("0","1")))

## Covariates 
dfagrar_noncj$age <- dfagrar_noncj$Q1_recode_age
dfagrar_noncj$female <- as.numeric(ordered(dfagrar_noncj$Q2,levels=c("Male","Female")))

dfagrar_noncj$education <- NULL
dfagrar_noncj$education[dfagrar_noncj$Q4_USA_recoded=="NO COL" | dfagrar_noncj$Q4_CH_recoded=="Low"] = "Low"
dfagrar_noncj$education[dfagrar_noncj$Q4_USA_recoded=="SOME COL" | dfagrar_noncj$Q4_CH_recoded=="Medium"] = "Medium"
dfagrar_noncj$education[dfagrar_noncj$Q4_USA_recoded=="COLL+" | dfagrar_noncj$Q4_CH_recoded=="High"] = "High"
dfagrar_noncj$education <- as.numeric(ordered(dfagrar_noncj$education,levels=c("Low","Medium", "High")))

dfagrar_noncj$income <- as.numeric(ordered(dfagrar_noncj$Q22,levels=c("Less than 500 US-$",
                                                          "501 to 1,000 US-$",
                                                          "1,001 to 2,000 US-$",
                                                          "2,001 to 3,000 US-$",
                                                          "3,001 to 4,000 US-$",
                                                          "4,001 to 5,000 US-$",
                                                          "5,001 to 6,000 US-$",
                                                          "6,001 to 7,000 US-$",
                                                          "7,001 to 8,000 US-$",
                                                          "8,001 to 9,000 US-$",
                                                          "9,001 to 10,000 US-$",
                                                          "10,001 US-$ or more")))

dfagrar_noncj$employed <- as.numeric(ordered(dfagrar_noncj$Q23,levels=c("No","Yes")))
dfagrar_noncj$know_farmer <- as.numeric(ordered(dfagrar_noncj$Q26,levels=c("No","Yes")))
dfagrar_noncj$workonfarm <- as.numeric(ordered(dfagrar_noncj$Q28,levels=c("No","Yes")))

#Food security: Higher values, country should be more self-sufficient
dfagrar_noncj$prod_food <- as.numeric(ordered(dfagrar_noncj$Part3_Q5_1_scale,levels=c("Completely Disagree","Mostly Disagree", "Slightly Disagree",
                                                                          "Slightly Agree", "Mostly Agree", "Completely Agree")))  

# Traditionalism: higher values indicate higher importance of tradition
dfagrar_noncj$tradition1 <- as.numeric(ordered(dfagrar_noncj$Part3_Q6_1_scale,levels=c("Completely Disagree","Mostly Disagree", "Slightly Disagree",
                                                                           "Slightly Agree", "Mostly Agree", "Completely Agree")))                                                                           

#Inequality aversion: Higher values, higher preference for equality
dfagrar_noncj$equality <- as.numeric(ordered(dfagrar_noncj$Part3_Q7,levels=c("7 We need income differences as incentive for individual effort.",
                                                                 "6", "5", "4", "3", "2", "1 Income should be distributed equally.")))

#Economic insecurity: Higher values, higher insecurity
dfagrar_noncj$econ_insecure <- as.numeric(ordered(dfagrar_noncj$Part3_Q8,levels=c("Very secure", "Secure", "Somewhat secure", 
                                                                      "Somewhat insecure", "Insecure", "Very insecure")))

#Environmental concern: Higher values, higher concern for environment
dfagrar_noncj$env_concern1 <- as.numeric(ordered(dfagrar_noncj$Part3_Q10,levels=c("Global climate change does not exist.", "… never.", "… not for many years.",
                                                                      "… in the next few years.", "... now.")))

#Nationalism: Higher values, stronger nationalist sentiments 
dfagrar_noncj$nat1 <- as.numeric(ordered(dfagrar_noncj$Part3_Q14_1_scale,levels=c("Completely Disagree", "Mostly Disagree", "Slightly Disagree",
                                                                      "Slightly Agree", "Mostly Agree", "Completely Agree")))
##Subset sample 
#US sample 
dfagrar_us <- subset(dfagrar_noncj, QCountry=="USA")

#CH sample 
dfagrar_ch <- subset(dfagrar_noncj, QCountry=="Switzerland")


#############
##Table 3: Cost experiment – Difference in means test

#US sample 
res <- t.test(farmsupport ~ treatment, data = dfagrar_us, paired = FALSE)
res

res$estimate
res$p.value
res$conf.int

#CH sample 
res1 <- t.test(farmsupport ~ treatment, data = dfagrar_ch, paired = FALSE)
res1

res1$estimate
res1$p.value
res1$conf.int


#####Table 4: Effect of Cost Information on Evaluation for Farmer Subsidies
# Full regressions model with control variables 

## In USA 
model1 <- lm(farmsupport ~ as.factor(treatment) + age + female + education + income + as.factor(employed) +
               as.factor(know_farmer) + as.factor(workonfarm) +
               tradition1 + equality + econ_insecure + env_concern1 + nat1, data = dfagrar_us)
summary(model1)

## In Switzerland
model2 <- lm(farmsupport ~ as.factor(treatment) + age + female + education + income + as.factor(employed) +
               as.factor(know_farmer) + as.factor(workonfarm) +
               tradition1 + equality + econ_insecure + env_concern1 + nat1, data = dfagrar_ch)
summary(model2)

## Get regression outputs 

stargazer(model1,model2,
          title = "Effect of Cost Frame on Evaluation for Farmer Subsidies", align =TRUE,
          font.size = "small", column.sep.width = "1pt",
          out="regression.tex")


##Figure 1: Allocation preferences of budget for agricultural subsidies

##Combine responses from both samples into one dataset 
dfbar <- data.frame(Sample=rep(c("USA", "Switzerland"), each=6),
                    Goal=rep(c("Environment", "Small farmers", "Food(security)", 
                               "Countryside", "Animals", "Food(quality)"),2),
                    Percent=c(14.0, 12.9, 21.2, 13.6, 15.5, 23.0, 18.6, 11.6, 15.8, 12.1, 23.3, 18.6))

head(dfbar)

####Create bar graph for budget allocation question 
ggplot(data=dfbar, aes(x=Goal, y=Percent, fill=Sample)) +
  geom_bar(stat="identity", position=position_dodge()) + 
  geom_text(aes(label=Percent), vjust=1.6, color="white", position = position_dodge(0.9), size=3.5)


####Clear working directory and end script####
